{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "from nes_py.wrappers import JoypadSpace\n",
    "import gym_super_mario_bros\n",
    "from gym_super_mario_bros.actions import SIMPLE_MOVEMENT\n",
    "import time\n",
    "from matplotlib import pyplot as plt\n",
    "from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack\n",
    "from stable_baselines3 import PPO\n",
    "\n",
    "from gym.wrappers import GrayScaleObservation\n",
    "\n",
    "from stable_baselines3.common.monitor import Monitor\n",
    "from stable_baselines3.common.results_plotter import load_results, ts2xy\n",
    "import numpy as np\n",
    "import os\n",
    "from stable_baselines3.common.callbacks import BaseCallback\n",
    "\n",
    "import optuna\n",
    "\n",
    "from stable_baselines3.common.evaluation import evaluate_policy\n",
    "\n",
    "import os\n",
    "\n",
    "\n",
    "env = gym_super_mario_bros.make('SuperMarioBros-v0')\n",
    "env = JoypadSpace(env, SIMPLE_MOVEMENT)\n",
    "\n",
    "\n",
    "\n",
    "log_dir = './log_dir2/'\n",
    "os.makedirs(log_dir, exist_ok=True)\n",
    "\n",
    "env = Monitor(env, log_dir)\n",
    "\n",
    "env = GrayScaleObservation(env,keep_dim=True)\n",
    "env = DummyVecEnv([lambda: env])\n",
    "env = VecFrameStack(env,4,channels_order='last')\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# PPO主要超参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def optimize_ppo(trial): \n",
    "    return {\n",
    "        'n_steps':trial.suggest_int('n_steps', 2048, 8192),\n",
    "        'gamma':trial.suggest_loguniform('gamma', 0.8, 0.9999),\n",
    "        'learning_rate':trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),\n",
    "        'clip_range':trial.suggest_uniform('clip_range', 0.1, 0.4),\n",
    "        'gae_lambda':trial.suggest_uniform('gae_lambda', 0.8, 0.99)\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 超参数调优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def optimize_agent(trial):\n",
    "    \n",
    "    try:\n",
    "        env = gym_super_mario_bros.make('SuperMarioBros-v0')\n",
    "        env = JoypadSpace(env, SIMPLE_MOVEMENT)\n",
    "\n",
    "        log_dir = './log_dir2/'\n",
    "        os.makedirs(log_dir, exist_ok=True)\n",
    "\n",
    "        env = Monitor(env, log_dir)\n",
    "\n",
    "        env = GrayScaleObservation(env,keep_dim=True)\n",
    "        env = DummyVecEnv([lambda: env])\n",
    "        env = VecFrameStack(env,4,channels_order='last')\n",
    "\n",
    "    \n",
    "        model_params = optimize_ppo(trial) \n",
    "    \n",
    "\n",
    "        tensorboard_log = r'./logs/'\n",
    "        model = PPO(\"CnnPolicy\", env, verbose=0,tensorboard_log=tensorboard_log,**model_params)\n",
    "        # model.learn(total_timesteps=1000)\n",
    "        model.learn(total_timesteps=200000)\n",
    "    \n",
    "        mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=5)\n",
    "    \n",
    "    \n",
    "        env.close()\n",
    "    \n",
    "        OPT_DIR  = r'F:\\\\300_RL_DEMO\\\\220_Super-Mario-2\\\\'\n",
    "        SAVE_PATH = os.path.join(OPT_DIR, 'trial_{}_best_model'.format(trial.number))\n",
    "        model.save(SAVE_PATH)\n",
    "    \n",
    "        return mean_reward    \n",
    "\n",
    "    except Exception as e:\n",
    "        return -1000\n",
    "    \n",
    "    \n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m[I 2022-03-05 18:34:28,442]\u001b[0m A new study created in memory with name: no-name-f86b12cb-1ae9-42ed-99ea-686b25b058bb\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4613`, after every 72 untruncated mini-batches, there will be a truncated mini-batch of size 5\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4613 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-05 19:29:59,963]\u001b[0m Trial 0 finished with value: 737.0 and parameters: {'n_steps': 4613, 'gamma': 0.897637642087022, 'learning_rate': 1.132279703505345e-05, 'clip_range': 0.24905728408479416, 'gae_lambda': 0.9265479662259783}. Best is trial 0 with value: 737.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 6897`, after every 107 untruncated mini-batches, there will be a truncated mini-batch of size 49\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=6897 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-05 20:26:14,200]\u001b[0m Trial 1 finished with value: 741.0 and parameters: {'n_steps': 6897, 'gamma': 0.8827508549180133, 'learning_rate': 1.662967178786175e-05, 'clip_range': 0.21117729231503277, 'gae_lambda': 0.9468593404751742}. Best is trial 1 with value: 741.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 5247`, after every 81 untruncated mini-batches, there will be a truncated mini-batch of size 63\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=5247 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-05 21:20:07,516]\u001b[0m Trial 2 finished with value: 726.0 and parameters: {'n_steps': 5247, 'gamma': 0.8751392158189091, 'learning_rate': 8.876668716969671e-05, 'clip_range': 0.18340753684673297, 'gae_lambda': 0.9751594117960503}. Best is trial 1 with value: 741.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 6244`, after every 97 untruncated mini-batches, there will be a truncated mini-batch of size 36\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=6244 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-05 22:16:01,829]\u001b[0m Trial 3 finished with value: 680.0 and parameters: {'n_steps': 6244, 'gamma': 0.988247241779108, 'learning_rate': 1.703240104294193e-05, 'clip_range': 0.11513336674332397, 'gae_lambda': 0.8477371508488406}. Best is trial 1 with value: 741.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 5075`, after every 79 untruncated mini-batches, there will be a truncated mini-batch of size 19\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=5075 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-05 23:11:12,661]\u001b[0m Trial 4 finished with value: 1108.0 and parameters: {'n_steps': 5075, 'gamma': 0.9434048013157359, 'learning_rate': 6.876751882520255e-05, 'clip_range': 0.29653339255703803, 'gae_lambda': 0.9165646526221465}. Best is trial 4 with value: 1108.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3824`, after every 59 untruncated mini-batches, there will be a truncated mini-batch of size 48\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3824 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 00:04:42,065]\u001b[0m Trial 5 finished with value: 680.0 and parameters: {'n_steps': 3824, 'gamma': 0.9166065354226135, 'learning_rate': 2.376920494698484e-05, 'clip_range': 0.25022961821020706, 'gae_lambda': 0.8460392447940066}. Best is trial 4 with value: 1108.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3242`, after every 50 untruncated mini-batches, there will be a truncated mini-batch of size 42\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3242 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 00:57:53,334]\u001b[0m Trial 6 finished with value: 737.0 and parameters: {'n_steps': 3242, 'gamma': 0.9142904652627202, 'learning_rate': 1.179458486922465e-05, 'clip_range': 0.10418247494503566, 'gae_lambda': 0.9548389376323592}. Best is trial 4 with value: 1108.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3621`, after every 56 untruncated mini-batches, there will be a truncated mini-batch of size 37\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3621 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 02:02:47,923]\u001b[0m Trial 7 finished with value: 402.0 and parameters: {'n_steps': 3621, 'gamma': 0.932039804388231, 'learning_rate': 3.196712708481807e-05, 'clip_range': 0.15649672641966508, 'gae_lambda': 0.9065213802284359}. Best is trial 4 with value: 1108.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 6602`, after every 103 untruncated mini-batches, there will be a truncated mini-batch of size 10\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=6602 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 02:02:49,354]\u001b[0m Trial 8 finished with value: -1000.0 and parameters: {'n_steps': 6602, 'gamma': 0.8181386281547305, 'learning_rate': 4.3440283846169765e-05, 'clip_range': 0.3602695748934023, 'gae_lambda': 0.8926590155760882}. Best is trial 4 with value: 1108.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 6820`, after every 106 untruncated mini-batches, there will be a truncated mini-batch of size 36\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=6820 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 02:02:49,739]\u001b[0m Trial 9 finished with value: -1000.0 and parameters: {'n_steps': 6820, 'gamma': 0.9262140004892221, 'learning_rate': 1.6661248991136217e-05, 'clip_range': 0.3973329888024071, 'gae_lambda': 0.9560055673674822}. Best is trial 4 with value: 1108.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 2211`, after every 34 untruncated mini-batches, there will be a truncated mini-batch of size 35\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=2211 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 03:08:41,504]\u001b[0m Trial 10 finished with value: -1246.0 and parameters: {'n_steps': 2211, 'gamma': 0.9852992446932544, 'learning_rate': 9.12098310664667e-05, 'clip_range': 0.3181106650492799, 'gae_lambda': 0.803344410676354}. Best is trial 4 with value: 1108.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 8179`, after every 127 untruncated mini-batches, there will be a truncated mini-batch of size 51\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=8179 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 03:08:43,233]\u001b[0m Trial 11 finished with value: -1000.0 and parameters: {'n_steps': 8179, 'gamma': 0.8612755091478944, 'learning_rate': 5.543478474730427e-05, 'clip_range': 0.2594695900931689, 'gae_lambda': 0.9302140987878883}. Best is trial 4 with value: 1108.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 5494`, after every 85 untruncated mini-batches, there will be a truncated mini-batch of size 54\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=5494 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 03:08:43,637]\u001b[0m Trial 12 finished with value: -1000.0 and parameters: {'n_steps': 5494, 'gamma': 0.8406303228393, 'learning_rate': 4.608155953558372e-05, 'clip_range': 0.2872531262043011, 'gae_lambda': 0.8819928426294149}. Best is trial 4 with value: 1108.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7943`, after every 124 untruncated mini-batches, there will be a truncated mini-batch of size 7\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=7943 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 03:08:43,980]\u001b[0m Trial 13 finished with value: -1000.0 and parameters: {'n_steps': 7943, 'gamma': 0.9564835038885848, 'learning_rate': 2.7640406248913324e-05, 'clip_range': 0.20177424494018315, 'gae_lambda': 0.9318582021329859}. Best is trial 4 with value: 1108.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7149`, after every 111 untruncated mini-batches, there will be a truncated mini-batch of size 45\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=7149 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 04:04:57,152]\u001b[0m Trial 14 finished with value: 2567.0 and parameters: {'n_steps': 7149, 'gamma': 0.8692871366327747, 'learning_rate': 6.442559213980066e-05, 'clip_range': 0.31688308594665404, 'gae_lambda': 0.8710254680014865}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4509`, after every 70 untruncated mini-batches, there will be a truncated mini-batch of size 29\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4509 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 05:10:26,873]\u001b[0m Trial 15 finished with value: 402.0 and parameters: {'n_steps': 4509, 'gamma': 0.9582982775319306, 'learning_rate': 6.480431134517855e-05, 'clip_range': 0.3240909269199512, 'gae_lambda': 0.8663149800520322}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 6014`, after every 93 untruncated mini-batches, there will be a truncated mini-batch of size 62\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=6014 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 06:09:01,530]\u001b[0m Trial 16 finished with value: 1455.0 and parameters: {'n_steps': 6014, 'gamma': 0.8435431830168784, 'learning_rate': 6.449666763925046e-05, 'clip_range': 0.3268859684160919, 'gae_lambda': 0.8274230907757613}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7451`, after every 116 untruncated mini-batches, there will be a truncated mini-batch of size 27\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=7451 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 07:08:44,825]\u001b[0m Trial 17 finished with value: 1822.0 and parameters: {'n_steps': 7451, 'gamma': 0.8042228099361809, 'learning_rate': 3.8773133792798885e-05, 'clip_range': 0.3573465888473167, 'gae_lambda': 0.8020941461494714}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7453`, after every 116 untruncated mini-batches, there will be a truncated mini-batch of size 29\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=7453 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 08:13:30,376]\u001b[0m Trial 18 finished with value: 73.0 and parameters: {'n_steps': 7453, 'gamma': 0.8073761993893441, 'learning_rate': 3.726352817312057e-05, 'clip_range': 0.3979344126597833, 'gae_lambda': 0.8013869854112057}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7455`, after every 116 untruncated mini-batches, there will be a truncated mini-batch of size 31\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=7455 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 09:07:38,596]\u001b[0m Trial 19 finished with value: 705.0 and parameters: {'n_steps': 7455, 'gamma': 0.8285493883311458, 'learning_rate': 4.712634118508695e-05, 'clip_range': 0.3526727369305535, 'gae_lambda': 0.8264917611952299}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7439`, after every 116 untruncated mini-batches, there will be a truncated mini-batch of size 15\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=7439 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 10:07:50,758]\u001b[0m Trial 20 finished with value: 740.0 and parameters: {'n_steps': 7439, 'gamma': 0.8575393673052868, 'learning_rate': 2.3388213506201787e-05, 'clip_range': 0.3638098627065032, 'gae_lambda': 0.8698228921915958}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 6040`, after every 94 untruncated mini-batches, there will be a truncated mini-batch of size 24\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=6040 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 10:07:58,085]\u001b[0m Trial 21 finished with value: -1000.0 and parameters: {'n_steps': 6040, 'gamma': 0.8407654198004241, 'learning_rate': 6.247267301214668e-05, 'clip_range': 0.33793841539815983, 'gae_lambda': 0.8281022658188876}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 6003`, after every 93 untruncated mini-batches, there will be a truncated mini-batch of size 51\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=6003 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 10:08:00,298]\u001b[0m Trial 22 finished with value: -1000.0 and parameters: {'n_steps': 6003, 'gamma': 0.8059997155503917, 'learning_rate': 7.87845341477969e-05, 'clip_range': 0.29439348165229284, 'gae_lambda': 0.8227742070745756}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7130`, after every 111 untruncated mini-batches, there will be a truncated mini-batch of size 26\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=7130 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 10:08:00,890]\u001b[0m Trial 23 finished with value: -1000.0 and parameters: {'n_steps': 7130, 'gamma': 0.8478702908704635, 'learning_rate': 5.050046436054904e-05, 'clip_range': 0.36328835328005316, 'gae_lambda': 0.8437762884632954}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 6422`, after every 100 untruncated mini-batches, there will be a truncated mini-batch of size 22\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=6422 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 10:08:01,470]\u001b[0m Trial 24 finished with value: -1000.0 and parameters: {'n_steps': 6422, 'gamma': 0.8246905041699544, 'learning_rate': 3.44170442507643e-05, 'clip_range': 0.3149144306243738, 'gae_lambda': 0.8179959301164946}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 5860`, after every 91 untruncated mini-batches, there will be a truncated mini-batch of size 36\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=5860 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 10:08:01,943]\u001b[0m Trial 25 finished with value: -1000.0 and parameters: {'n_steps': 5860, 'gamma': 0.8009530153179111, 'learning_rate': 4.1210218242129155e-05, 'clip_range': 0.3788556704616489, 'gae_lambda': 0.8611304482561349}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7847`, after every 122 untruncated mini-batches, there will be a truncated mini-batch of size 39\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=7847 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 10:08:02,334]\u001b[0m Trial 26 finished with value: -1000.0 and parameters: {'n_steps': 7847, 'gamma': 0.8701457607715932, 'learning_rate': 9.951473566699334e-05, 'clip_range': 0.2670128248151571, 'gae_lambda': 0.8143470380252991}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7070`, after every 110 untruncated mini-batches, there will be a truncated mini-batch of size 30\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=7070 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 10:08:02,766]\u001b[0m Trial 27 finished with value: -1000.0 and parameters: {'n_steps': 7070, 'gamma': 0.8309617488297305, 'learning_rate': 7.977477083642515e-05, 'clip_range': 0.338106282382525, 'gae_lambda': 0.8443447538755366}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 5531`, after every 86 untruncated mini-batches, there will be a truncated mini-batch of size 27\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=5531 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 10:08:04,894]\u001b[0m Trial 28 finished with value: -1000.0 and parameters: {'n_steps': 5531, 'gamma': 0.8547933285517536, 'learning_rate': 5.750984934282074e-05, 'clip_range': 0.28237907222211833, 'gae_lambda': 0.8839824175858851}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4553`, after every 71 untruncated mini-batches, there will be a truncated mini-batch of size 9\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4553 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 11:06:51,349]\u001b[0m Trial 29 finished with value: 694.0 and parameters: {'n_steps': 4553, 'gamma': 0.8855416565776462, 'learning_rate': 7.394572418295172e-05, 'clip_range': 0.22507503074817942, 'gae_lambda': 0.8342167241413327}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 6602`, after every 103 untruncated mini-batches, there will be a truncated mini-batch of size 10\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=6602 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 11:06:53,589]\u001b[0m Trial 30 finished with value: -1000.0 and parameters: {'n_steps': 6602, 'gamma': 0.9037491239570644, 'learning_rate': 5.390082032120395e-05, 'clip_range': 0.33830153770430604, 'gae_lambda': 0.8103779152493754}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4845`, after every 75 untruncated mini-batches, there will be a truncated mini-batch of size 45\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4845 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 11:06:53,988]\u001b[0m Trial 31 finished with value: -1000.0 and parameters: {'n_steps': 4845, 'gamma': 0.9497625748146148, 'learning_rate': 7.350848561393908e-05, 'clip_range': 0.3030216320061226, 'gae_lambda': 0.9024371662346983}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4980`, after every 77 untruncated mini-batches, there will be a truncated mini-batch of size 52\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4980 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 11:06:54,356]\u001b[0m Trial 32 finished with value: -1000.0 and parameters: {'n_steps': 4980, 'gamma': 0.8989280394784397, 'learning_rate': 6.555938037897706e-05, 'clip_range': 0.3057219514602332, 'gae_lambda': 0.9195491579118608}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 5608`, after every 87 untruncated mini-batches, there will be a truncated mini-batch of size 40\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=5608 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 11:06:54,718]\u001b[0m Trial 33 finished with value: -1000.0 and parameters: {'n_steps': 5608, 'gamma': 0.8805322834085713, 'learning_rate': 6.654437493975714e-05, 'clip_range': 0.2783774040571631, 'gae_lambda': 0.9162603550632249}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4144`, after every 64 untruncated mini-batches, there will be a truncated mini-batch of size 48\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4144 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 12:14:35,341]\u001b[0m Trial 34 finished with value: 68.0 and parameters: {'n_steps': 4144, 'gamma': 0.8179857652702587, 'learning_rate': 4.0948477129129495e-05, 'clip_range': 0.23769426317639702, 'gae_lambda': 0.9823882214189458}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 5292`, after every 82 untruncated mini-batches, there will be a truncated mini-batch of size 44\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=5292 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 13:20:26,861]\u001b[0m Trial 35 finished with value: 1594.0 and parameters: {'n_steps': 5292, 'gamma': 0.8715979502300705, 'learning_rate': 8.178268609116757e-05, 'clip_range': 0.32502506720618196, 'gae_lambda': 0.8553321163636568}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 6254`, after every 97 untruncated mini-batches, there will be a truncated mini-batch of size 46\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=6254 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 13:20:27,283]\u001b[0m Trial 36 finished with value: -1000.0 and parameters: {'n_steps': 6254, 'gamma': 0.8859273916611884, 'learning_rate': 8.478620176300249e-05, 'clip_range': 0.32944633946690344, 'gae_lambda': 0.8592617834633806}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7124`, after every 111 untruncated mini-batches, there will be a truncated mini-batch of size 20\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=7124 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 13:20:27,602]\u001b[0m Trial 37 finished with value: -1000.0 and parameters: {'n_steps': 7124, 'gamma': 0.8686787810726534, 'learning_rate': 9.674146006034391e-05, 'clip_range': 0.383702348017887, 'gae_lambda': 0.8356521595389348}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 5223`, after every 81 untruncated mini-batches, there will be a truncated mini-batch of size 39\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=5223 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 13:20:30,037]\u001b[0m Trial 38 finished with value: -1000.0 and parameters: {'n_steps': 5223, 'gamma': 0.8367093255383361, 'learning_rate': 2.7063686908385612e-05, 'clip_range': 0.35003161845629793, 'gae_lambda': 0.8783892109389828}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 6765`, after every 105 untruncated mini-batches, there will be a truncated mini-batch of size 45\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=6765 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 13:20:30,426]\u001b[0m Trial 39 finished with value: -1000.0 and parameters: {'n_steps': 6765, 'gamma': 0.8492519001392479, 'learning_rate': 5.9515618438873566e-05, 'clip_range': 0.13342223622252922, 'gae_lambda': 0.8542199432378883}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7719`, after every 120 untruncated mini-batches, there will be a truncated mini-batch of size 39\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=7719 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-06 13:20:30,795]\u001b[0m Trial 40 finished with value: -1000.0 and parameters: {'n_steps': 7719, 'gamma': 0.8661127771496909, 'learning_rate': 1.3503877759986288e-05, 'clip_range': 0.3739543280470977, 'gae_lambda': 0.8344792352828255}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4217`, after every 65 untruncated mini-batches, there will be a truncated mini-batch of size 57\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4217 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 14:15:43,677]\u001b[0m Trial 41 finished with value: 1705.0 and parameters: {'n_steps': 4217, 'gamma': 0.8910212479248375, 'learning_rate': 7.071169471365639e-05, 'clip_range': 0.30333055491748045, 'gae_lambda': 0.8944223022204731}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 5717`, after every 89 untruncated mini-batches, there will be a truncated mini-batch of size 21\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=5717 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 15:10:14,160]\u001b[0m Trial 42 finished with value: 1867.0 and parameters: {'n_steps': 5717, 'gamma': 0.8917316983578463, 'learning_rate': 8.694443860101002e-05, 'clip_range': 0.3129778848436826, 'gae_lambda': 0.8935165868170086}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3047`, after every 47 untruncated mini-batches, there will be a truncated mini-batch of size 39\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3047 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 16:06:30,156]\u001b[0m Trial 43 finished with value: 2410.0 and parameters: {'n_steps': 3047, 'gamma': 0.9130726404574275, 'learning_rate': 8.670916257195491e-05, 'clip_range': 0.3091383426627936, 'gae_lambda': 0.894284800413888}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 2616`, after every 40 untruncated mini-batches, there will be a truncated mini-batch of size 56\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=2616 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 17:01:39,875]\u001b[0m Trial 44 finished with value: 1659.0 and parameters: {'n_steps': 2616, 'gamma': 0.9101817033589411, 'learning_rate': 9.029896760108062e-05, 'clip_range': 0.26986716597499794, 'gae_lambda': 0.900895353168195}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3106`, after every 48 untruncated mini-batches, there will be a truncated mini-batch of size 34\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3106 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 17:59:13,182]\u001b[0m Trial 45 finished with value: 2398.0 and parameters: {'n_steps': 3106, 'gamma': 0.9259656333077082, 'learning_rate': 7.262116275629746e-05, 'clip_range': 0.30092012888720676, 'gae_lambda': 0.888936283078263}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3124`, after every 48 untruncated mini-batches, there will be a truncated mini-batch of size 52\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3124 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 18:54:01,696]\u001b[0m Trial 46 finished with value: 741.0 and parameters: {'n_steps': 3124, 'gamma': 0.9253530260172628, 'learning_rate': 9.001470363619922e-05, 'clip_range': 0.2531855599229427, 'gae_lambda': 0.9402516744853799}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3356`, after every 52 untruncated mini-batches, there will be a truncated mini-batch of size 28\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3356 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 19:59:37,365]\u001b[0m Trial 47 finished with value: 370.0 and parameters: {'n_steps': 3356, 'gamma': 0.9353306931402269, 'learning_rate': 5.0769768457077954e-05, 'clip_range': 0.3476976808627066, 'gae_lambda': 0.8853348511690049}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 2699`, after every 42 untruncated mini-batches, there will be a truncated mini-batch of size 11\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=2699 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 20:54:48,573]\u001b[0m Trial 48 finished with value: 737.0 and parameters: {'n_steps': 2699, 'gamma': 0.9199946203956523, 'learning_rate': 2.1585180520246142e-05, 'clip_range': 0.3128908757144187, 'gae_lambda': 0.8747731631940737}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 2315`, after every 36 untruncated mini-batches, there will be a truncated mini-batch of size 11\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=2315 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 21:49:36,359]\u001b[0m Trial 49 finished with value: 741.0 and parameters: {'n_steps': 2315, 'gamma': 0.9712910946570741, 'learning_rate': 1.0190793784218886e-05, 'clip_range': 0.2920879754217268, 'gae_lambda': 0.9090505238425378}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 2756`, after every 43 untruncated mini-batches, there will be a truncated mini-batch of size 4\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=2756 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 22:44:31,009]\u001b[0m Trial 50 finished with value: 1435.0 and parameters: {'n_steps': 2756, 'gamma': 0.9068218464259754, 'learning_rate': 7.50516125450976e-05, 'clip_range': 0.2796528411600291, 'gae_lambda': 0.9592816120704681}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4080`, after every 63 untruncated mini-batches, there will be a truncated mini-batch of size 48\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4080 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-06 23:44:41,589]\u001b[0m Trial 51 finished with value: 609.0 and parameters: {'n_steps': 4080, 'gamma': 0.8938795663231619, 'learning_rate': 7.073532219183787e-05, 'clip_range': 0.3051880043309865, 'gae_lambda': 0.8952384969338864}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3543`, after every 55 untruncated mini-batches, there will be a truncated mini-batch of size 23\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3543 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-07 00:45:44,615]\u001b[0m Trial 52 finished with value: 817.0 and parameters: {'n_steps': 3543, 'gamma': 0.938062277318344, 'learning_rate': 8.527079055349473e-05, 'clip_range': 0.29767593059723013, 'gae_lambda': 0.8930319851161982}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 2985`, after every 46 untruncated mini-batches, there will be a truncated mini-batch of size 41\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=2985 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-07 01:40:41,574]\u001b[0m Trial 53 finished with value: 741.0 and parameters: {'n_steps': 2985, 'gamma': 0.8912426702572961, 'learning_rate': 1.9428380443025503e-05, 'clip_range': 0.3145128111725325, 'gae_lambda': 0.9117202164037068}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4075`, after every 63 untruncated mini-batches, there will be a truncated mini-batch of size 43\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4075 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-07 02:36:50,775]\u001b[0m Trial 54 finished with value: 1412.0 and parameters: {'n_steps': 4075, 'gamma': 0.8772395054379676, 'learning_rate': 9.97788221937896e-05, 'clip_range': 0.243116133893357, 'gae_lambda': 0.8886975820652069}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3473`, after every 54 untruncated mini-batches, there will be a truncated mini-batch of size 17\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3473 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-07 03:31:17,590]\u001b[0m Trial 55 finished with value: 678.0 and parameters: {'n_steps': 3473, 'gamma': 0.9156811826636195, 'learning_rate': 6.0431014102858155e-05, 'clip_range': 0.3358459057694886, 'gae_lambda': 0.8728054254907471}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3752`, after every 58 untruncated mini-batches, there will be a truncated mini-batch of size 40\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3752 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-07 04:29:47,317]\u001b[0m Trial 56 finished with value: 741.0 and parameters: {'n_steps': 3752, 'gamma': 0.9991392283134757, 'learning_rate': 2.926398853461841e-05, 'clip_range': 0.2668723872536951, 'gae_lambda': 0.9002530476639748}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 8101`, after every 126 untruncated mini-batches, there will be a truncated mini-batch of size 37\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=8101 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 04:29:47,782]\u001b[0m Trial 57 finished with value: -1000.0 and parameters: {'n_steps': 8101, 'gamma': 0.9252351599423807, 'learning_rate': 7.095219369358145e-05, 'clip_range': 0.34754181998141076, 'gae_lambda': 0.9222066615734498}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 2062`, after every 32 untruncated mini-batches, there will be a truncated mini-batch of size 14\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=2062 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-07 05:25:07,223]\u001b[0m Trial 58 finished with value: 739.0 and parameters: {'n_steps': 2062, 'gamma': 0.9020161195539482, 'learning_rate': 5.291704930075014e-05, 'clip_range': 0.17012056705828799, 'gae_lambda': 0.8654611717406063}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7646`, after every 119 untruncated mini-batches, there will be a truncated mini-batch of size 30\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=7646 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-07 06:22:14,943]\u001b[0m Trial 59 finished with value: 366.0 and parameters: {'n_steps': 7646, 'gamma': 0.8952904986521164, 'learning_rate': 7.935265251736014e-05, 'clip_range': 0.36979809309219025, 'gae_lambda': 0.9278952957032285}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4284`, after every 66 untruncated mini-batches, there will be a truncated mini-batch of size 60\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4284 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-07 07:16:23,942]\u001b[0m Trial 60 finished with value: 1973.0 and parameters: {'n_steps': 4284, 'gamma': 0.9110040717214379, 'learning_rate': 9.187845575615934e-05, 'clip_range': 0.38829584868019695, 'gae_lambda': 0.9383909586067057}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4253`, after every 66 untruncated mini-batches, there will be a truncated mini-batch of size 29\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4253 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-07 08:10:32,320]\u001b[0m Trial 61 finished with value: 2091.0 and parameters: {'n_steps': 4253, 'gamma': 0.9118979212123, 'learning_rate': 9.181930134977773e-05, 'clip_range': 0.3958129256005329, 'gae_lambda': 0.9407869931515331}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3851`, after every 60 untruncated mini-batches, there will be a truncated mini-batch of size 11\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3851 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-07 09:03:39,167]\u001b[0m Trial 62 finished with value: 1781.0 and parameters: {'n_steps': 3851, 'gamma': 0.9444287123430885, 'learning_rate': 9.242692961697141e-05, 'clip_range': 0.385874858969981, 'gae_lambda': 0.9481254529686762}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4741`, after every 74 untruncated mini-batches, there will be a truncated mini-batch of size 5\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4741 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-07 09:57:21,336]\u001b[0m Trial 63 finished with value: 1251.0 and parameters: {'n_steps': 4741, 'gamma': 0.9131805160908741, 'learning_rate': 8.590622244727483e-05, 'clip_range': 0.39247791699897566, 'gae_lambda': 0.9367227537233231}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4336`, after every 67 untruncated mini-batches, there will be a truncated mini-batch of size 48\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4336 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-07 10:51:00,961]\u001b[0m Trial 64 finished with value: 1982.0 and parameters: {'n_steps': 4336, 'gamma': 0.9282630865067841, 'learning_rate': 7.783923589569095e-05, 'clip_range': 0.35870027878509075, 'gae_lambda': 0.9654026903269507}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4304`, after every 67 untruncated mini-batches, there will be a truncated mini-batch of size 16\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4304 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-07 11:47:22,362]\u001b[0m Trial 65 finished with value: 1810.0 and parameters: {'n_steps': 4304, 'gamma': 0.9308031858666277, 'learning_rate': 7.753939063488734e-05, 'clip_range': 0.3903683229129698, 'gae_lambda': 0.9690011198678629}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-07 12:43:17,944]\u001b[0m Trial 66 finished with value: 1114.0 and parameters: {'n_steps': 4416, 'gamma': 0.9198508427303134, 'learning_rate': 9.585231074951509e-05, 'clip_range': 0.36969622050895073, 'gae_lambda': 0.9603887454054212}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4686`, after every 73 untruncated mini-batches, there will be a truncated mini-batch of size 14\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4686 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:20,507]\u001b[0m Trial 67 finished with value: -1000.0 and parameters: {'n_steps': 4686, 'gamma': 0.9065770272095743, 'learning_rate': 8.485521201532408e-05, 'clip_range': 0.35883328281978105, 'gae_lambda': 0.9870767693087372}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3852`, after every 60 untruncated mini-batches, there will be a truncated mini-batch of size 12\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3852 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:20,965]\u001b[0m Trial 68 finished with value: -1000.0 and parameters: {'n_steps': 3852, 'gamma': 0.9660947503462107, 'learning_rate': 6.684545347927208e-05, 'clip_range': 0.3975154871945571, 'gae_lambda': 0.9473590337028432}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3025`, after every 47 untruncated mini-batches, there will be a truncated mini-batch of size 17\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3025 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:21,330]\u001b[0m Trial 69 finished with value: -1000.0 and parameters: {'n_steps': 3025, 'gamma': 0.9440425545223229, 'learning_rate': 7.697714010003478e-05, 'clip_range': 0.3781965701936643, 'gae_lambda': 0.9744532278010128}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4966`, after every 77 untruncated mini-batches, there will be a truncated mini-batch of size 38\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4966 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:21,689]\u001b[0m Trial 70 finished with value: -1000.0 and parameters: {'n_steps': 4966, 'gamma': 0.929739075825806, 'learning_rate': 9.306247301308416e-05, 'clip_range': 0.3233766750927538, 'gae_lambda': 0.9668657659837365}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7289`, after every 113 untruncated mini-batches, there will be a truncated mini-batch of size 57\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=7289 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:22,066]\u001b[0m Trial 71 finished with value: -1000.0 and parameters: {'n_steps': 7289, 'gamma': 0.9199887705573417, 'learning_rate': 8.333947471369042e-05, 'clip_range': 0.35877351323049733, 'gae_lambda': 0.940928042823806}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3295`, after every 51 untruncated mini-batches, there will be a truncated mini-batch of size 31\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3295 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:22,414]\u001b[0m Trial 72 finished with value: -1000.0 and parameters: {'n_steps': 3295, 'gamma': 0.9094054304624419, 'learning_rate': 8.860166827670577e-05, 'clip_range': 0.34373727594275516, 'gae_lambda': 0.9122087164680311}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 5736`, after every 89 untruncated mini-batches, there will be a truncated mini-batch of size 40\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=5736 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:22,773]\u001b[0m Trial 73 finished with value: -1000.0 and parameters: {'n_steps': 5736, 'gamma': 0.901398272114272, 'learning_rate': 3.665000611476348e-05, 'clip_range': 0.3817819270355444, 'gae_lambda': 0.8800564166254614}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3999`, after every 62 untruncated mini-batches, there will be a truncated mini-batch of size 31\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3999 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:23,122]\u001b[0m Trial 74 finished with value: -1000.0 and parameters: {'n_steps': 3999, 'gamma': 0.9510193896936453, 'learning_rate': 4.71453146311117e-05, 'clip_range': 0.3326893761042739, 'gae_lambda': 0.9533892886782446}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 5329`, after every 83 untruncated mini-batches, there will be a truncated mini-batch of size 17\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=5329 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:23,476]\u001b[0m Trial 75 finished with value: -1000.0 and parameters: {'n_steps': 5329, 'gamma': 0.9377211321263526, 'learning_rate': 6.272369412170341e-05, 'clip_range': 0.35700420876202366, 'gae_lambda': 0.905574952980551}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3630`, after every 56 untruncated mini-batches, there will be a truncated mini-batch of size 46\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3630 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:23,810]\u001b[0m Trial 76 finished with value: -1000.0 and parameters: {'n_steps': 3630, 'gamma': 0.8866704755718706, 'learning_rate': 8.038645345908186e-05, 'clip_range': 0.3748044483377487, 'gae_lambda': 0.9330177613244236}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 2467`, after every 38 untruncated mini-batches, there will be a truncated mini-batch of size 35\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=2467 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:24,138]\u001b[0m Trial 77 finished with value: -1000.0 and parameters: {'n_steps': 2467, 'gamma': 0.8149698043361632, 'learning_rate': 7.308499777028299e-05, 'clip_range': 0.3669865968842609, 'gae_lambda': 0.9246444089510732}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 6887`, after every 107 untruncated mini-batches, there will be a truncated mini-batch of size 39\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=6887 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:25,323]\u001b[0m Trial 78 finished with value: -1000.0 and parameters: {'n_steps': 6887, 'gamma': 0.9256247927319848, 'learning_rate': 6.720398188568409e-05, 'clip_range': 0.20686788140345544, 'gae_lambda': 0.8978589937059659}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4476`, after every 69 untruncated mini-batches, there will be a truncated mini-batch of size 60\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4476 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:26,988]\u001b[0m Trial 79 finished with value: -1000.0 and parameters: {'n_steps': 4476, 'gamma': 0.9145799940274755, 'learning_rate': 5.6560784139086725e-05, 'clip_range': 0.3194166488145402, 'gae_lambda': 0.8865100317399267}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 2878`, after every 44 untruncated mini-batches, there will be a truncated mini-batch of size 62\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=2878 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:27,394]\u001b[0m Trial 80 finished with value: -1000.0 and parameters: {'n_steps': 2878, 'gamma': 0.8986692119581969, 'learning_rate': 9.879505366108581e-05, 'clip_range': 0.3982265515899643, 'gae_lambda': 0.9178258158310085}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4334`, after every 67 untruncated mini-batches, there will be a truncated mini-batch of size 46\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4334 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:27,766]\u001b[0m Trial 81 finished with value: -1000.0 and parameters: {'n_steps': 4334, 'gamma': 0.9314948533052463, 'learning_rate': 7.783653583411524e-05, 'clip_range': 0.387758256637583, 'gae_lambda': 0.9729113862975742}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 5077`, after every 79 untruncated mini-batches, there will be a truncated mini-batch of size 21\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=5077 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:28,141]\u001b[0m Trial 82 finished with value: -1000.0 and parameters: {'n_steps': 5077, 'gamma': 0.92170619784555, 'learning_rate': 8.939025516580943e-05, 'clip_range': 0.3863328195132818, 'gae_lambda': 0.9818780433293143}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4327`, after every 67 untruncated mini-batches, there will be a truncated mini-batch of size 39\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4327 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:28,502]\u001b[0m Trial 83 finished with value: -1000.0 and parameters: {'n_steps': 4327, 'gamma': 0.9337755353228524, 'learning_rate': 7.622261945563637e-05, 'clip_range': 0.2871730933072877, 'gae_lambda': 0.9610066711949787}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 6533`, after every 102 untruncated mini-batches, there will be a truncated mini-batch of size 5\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=6533 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:28,948]\u001b[0m Trial 84 finished with value: -1000.0 and parameters: {'n_steps': 6533, 'gamma': 0.9085158467522622, 'learning_rate': 8.121281522875432e-05, 'clip_range': 0.39126660992301704, 'gae_lambda': 0.8893624051914935}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4625`, after every 72 untruncated mini-batches, there will be a truncated mini-batch of size 17\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4625 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:29,295]\u001b[0m Trial 85 finished with value: -1000.0 and parameters: {'n_steps': 4625, 'gamma': 0.8605698718273114, 'learning_rate': 7.048398465631227e-05, 'clip_range': 0.3433015832873092, 'gae_lambda': 0.9695118622221114}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4841`, after every 75 untruncated mini-batches, there will be a truncated mini-batch of size 41\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4841 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:29,686]\u001b[0m Trial 86 finished with value: -1000.0 and parameters: {'n_steps': 4841, 'gamma': 0.938954732919385, 'learning_rate': 6.246816267553423e-05, 'clip_range': 0.35350731264167246, 'gae_lambda': 0.9551812713270633}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 6305`, after every 98 untruncated mini-batches, there will be a truncated mini-batch of size 33\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=6305 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:30,094]\u001b[0m Trial 87 finished with value: -1000.0 and parameters: {'n_steps': 6305, 'gamma': 0.9301928445475005, 'learning_rate': 9.319240085071255e-05, 'clip_range': 0.30928494742897833, 'gae_lambda': 0.9661982373625526}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3958`, after every 61 untruncated mini-batches, there will be a truncated mini-batch of size 54\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3958 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:30,444]\u001b[0m Trial 88 finished with value: -1000.0 and parameters: {'n_steps': 3958, 'gamma': 0.9131379058754291, 'learning_rate': 1.5123735748767847e-05, 'clip_range': 0.29792139725057604, 'gae_lambda': 0.8083185380538527}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4237`, after every 66 untruncated mini-batches, there will be a truncated mini-batch of size 13\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4237 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:30,786]\u001b[0m Trial 89 finished with value: -1000.0 and parameters: {'n_steps': 4237, 'gamma': 0.9476420756007728, 'learning_rate': 8.770474336998402e-05, 'clip_range': 0.36585435672305083, 'gae_lambda': 0.8754911305149418}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3680`, after every 57 untruncated mini-batches, there will be a truncated mini-batch of size 32\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3680 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:31,127]\u001b[0m Trial 90 finished with value: -1000.0 and parameters: {'n_steps': 3680, 'gamma': 0.9536905845850374, 'learning_rate': 3.151294444820078e-05, 'clip_range': 0.3287997736133169, 'gae_lambda': 0.9060056156526339}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3897`, after every 60 untruncated mini-batches, there will be a truncated mini-batch of size 57\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3897 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:31,458]\u001b[0m Trial 91 finished with value: -1000.0 and parameters: {'n_steps': 3897, 'gamma': 0.9637793462125451, 'learning_rate': 9.464484932780914e-05, 'clip_range': 0.380548782086403, 'gae_lambda': 0.9470267184259014}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3169`, after every 49 untruncated mini-batches, there will be a truncated mini-batch of size 33\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3169 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 12:43:31,824]\u001b[0m Trial 92 finished with value: -1000.0 and parameters: {'n_steps': 3169, 'gamma': 0.9439332474503539, 'learning_rate': 8.270570273501012e-05, 'clip_range': 0.39980963745995707, 'gae_lambda': 0.9444003086399329}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3486`, after every 54 untruncated mini-batches, there will be a truncated mini-batch of size 30\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3486 and n_envs=1)\n",
      "  warnings.warn(\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\gym_super_mario_bros\\smb_env.py:148: RuntimeWarning: overflow encountered in ubyte_scalars\n",
      "  return (self.ram[0x86] - self.ram[0x071c]) % 256\n",
      "\u001b[32m[I 2022-03-07 13:39:49,196]\u001b[0m Trial 93 finished with value: 737.0 and parameters: {'n_steps': 3486, 'gamma': 0.9244683869625897, 'learning_rate': 7.456824427541364e-05, 'clip_range': 0.3897310202345555, 'gae_lambda': 0.934664704724785}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4183`, after every 65 untruncated mini-batches, there will be a truncated mini-batch of size 23\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4183 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 13:39:49,668]\u001b[0m Trial 94 finished with value: -1000.0 and parameters: {'n_steps': 4183, 'gamma': 0.9179635930982846, 'learning_rate': 9.252473740413898e-05, 'clip_range': 0.3734115643090221, 'gae_lambda': 0.9515731704089543}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7570`, after every 118 untruncated mini-batches, there will be a truncated mini-batch of size 18\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=7570 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 13:39:49,990]\u001b[0m Trial 95 finished with value: -1000.0 and parameters: {'n_steps': 7570, 'gamma': 0.940223763403716, 'learning_rate': 9.960129751925874e-05, 'clip_range': 0.39230441085434237, 'gae_lambda': 0.9813063414240976}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 3781`, after every 59 untruncated mini-batches, there will be a truncated mini-batch of size 5\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=3781 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 13:39:50,300]\u001b[0m Trial 96 finished with value: -1000.0 and parameters: {'n_steps': 3781, 'gamma': 0.957740612712586, 'learning_rate': 6.891785485839814e-05, 'clip_range': 0.3831177668599617, 'gae_lambda': 0.8696588937040459}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7289`, after every 113 untruncated mini-batches, there will be a truncated mini-batch of size 57\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=7289 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 13:39:50,600]\u001b[0m Trial 97 finished with value: -1000.0 and parameters: {'n_steps': 7289, 'gamma': 0.9036354446102922, 'learning_rate': 8.606306902643168e-05, 'clip_range': 0.3621827616103255, 'gae_lambda': 0.9651702497650319}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 4540`, after every 70 untruncated mini-batches, there will be a truncated mini-batch of size 60\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=4540 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 13:39:50,916]\u001b[0m Trial 98 finished with value: -1000.0 and parameters: {'n_steps': 4540, 'gamma': 0.8811002762722294, 'learning_rate': 7.697653442957914e-05, 'clip_range': 0.31799973814829713, 'gae_lambda': 0.9511194246143116}. Best is trial 14 with value: 2567.0.\u001b[0m\n",
      "D:\\anaconda\\envs\\pytorch\\lib\\site-packages\\stable_baselines3\\ppo\\ppo.py:137: UserWarning: You have specified a mini-batch size of 64, but because the `RolloutBuffer` is of size `n_steps * n_envs = 7943`, after every 124 untruncated mini-batches, there will be a truncated mini-batch of size 7\n",
      "We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.\n",
      "Info: (n_steps=7943 and n_envs=1)\n",
      "  warnings.warn(\n",
      "\u001b[32m[I 2022-03-07 13:39:51,229]\u001b[0m Trial 99 finished with value: -1000.0 and parameters: {'n_steps': 7943, 'gamma': 0.9299061516352495, 'learning_rate': 8.878189408733648e-05, 'clip_range': 0.34159935882843495, 'gae_lambda': 0.9431110753570355}. Best is trial 14 with value: 2567.0.\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "study = optuna.create_study(direction='maximize')\n",
    "study.optimize(optimize_agent, n_trials=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['__class__',\n",
       " '__delattr__',\n",
       " '__dict__',\n",
       " '__dir__',\n",
       " '__doc__',\n",
       " '__eq__',\n",
       " '__format__',\n",
       " '__ge__',\n",
       " '__getattribute__',\n",
       " '__getstate__',\n",
       " '__gt__',\n",
       " '__hash__',\n",
       " '__init__',\n",
       " '__init_subclass__',\n",
       " '__le__',\n",
       " '__lt__',\n",
       " '__module__',\n",
       " '__ne__',\n",
       " '__new__',\n",
       " '__reduce__',\n",
       " '__reduce_ex__',\n",
       " '__repr__',\n",
       " '__setattr__',\n",
       " '__setstate__',\n",
       " '__sizeof__',\n",
       " '__str__',\n",
       " '__subclasshook__',\n",
       " '__weakref__',\n",
       " '_ask',\n",
       " '_is_multi_objective',\n",
       " '_log_completed_trial',\n",
       " '_optimize_lock',\n",
       " '_pop_waiting_trial_id',\n",
       " '_stop_flag',\n",
       " '_storage',\n",
       " '_study_id',\n",
       " '_tell',\n",
       " 'add_trial',\n",
       " 'add_trials',\n",
       " 'ask',\n",
       " 'best_params',\n",
       " 'best_trial',\n",
       " 'best_trials',\n",
       " 'best_value',\n",
       " 'direction',\n",
       " 'directions',\n",
       " 'enqueue_trial',\n",
       " 'get_trials',\n",
       " 'optimize',\n",
       " 'pruner',\n",
       " 'sampler',\n",
       " 'set_system_attr',\n",
       " 'set_user_attr',\n",
       " 'stop',\n",
       " 'study_name',\n",
       " 'system_attrs',\n",
       " 'tell',\n",
       " 'trials',\n",
       " 'trials_dataframe',\n",
       " 'user_attrs']"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dir(study)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'n_steps': 7149,\n",
       " 'gamma': 0.8692871366327747,\n",
       " 'learning_rate': 6.442559213980066e-05,\n",
       " 'clip_range': 0.31688308594665404,\n",
       " 'gae_lambda': 0.8710254680014865}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "study.best_params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "FrozenTrial(number=14, values=[2567.0], datetime_start=datetime.datetime(2022, 3, 6, 3, 8, 43, 981845), datetime_complete=datetime.datetime(2022, 3, 6, 4, 4, 57, 152303), params={'n_steps': 7149, 'gamma': 0.8692871366327747, 'learning_rate': 6.442559213980066e-05, 'clip_range': 0.31688308594665404, 'gae_lambda': 0.8710254680014865}, distributions={'n_steps': IntUniformDistribution(high=8192, low=2048, step=1), 'gamma': LogUniformDistribution(high=0.9999, low=0.8), 'learning_rate': LogUniformDistribution(high=0.0001, low=1e-05), 'clip_range': UniformDistribution(high=0.4, low=0.1), 'gae_lambda': UniformDistribution(high=0.99, low=0.8)}, user_attrs={}, system_attrs={}, intermediate_values={}, trial_id=14, state=TrialState.COMPLETE, value=None)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "study.best_trial"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "9465aae7e0ab1403d672807d1a0963d86dbda2f584fbe3054c36cf78311c6c77"
  },
  "kernelspec": {
   "display_name": "Python 3.8.11 ('pytorch')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
